Chapter 13 Source of functional differences

# A tibble: 3 × 2
  type_river count
  <chr>      <int>
1 core          25
2 endemic        7
3 marginal      11
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

13.1 Taxonomic variation

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  group_by(type,sample) %>% 
  summarise(fraction=sum(count)) %>% 
  group_by(type) %>% 
  summarise(mean(fraction))
`summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
# A tibble: 3 × 2
  type     `mean(fraction)`
  <chr>               <dbl>
1 core               0.901 
2 endemic            0.132 
3 marginal           0.0874

13.2 MAGs in different locations and shared among locations

13.2.1 Core

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="core") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

13.2.2 Endemic

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="endemic") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    ylim(0,1)+
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

13.2.3 Marginal

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  left_join(core_microbiota,by="genome") %>% 
  filter(type=="marginal") %>% 
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    ylim(0,1)+
    scale_fill_manual(values=phylum_colors) +
    facet_nested(~factor(environment, labels=c("low" = "Low altitude", "high" = "High altitude")),  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 12, lineheight = 0.6,face="bold"),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

13.2.4 Ordination

gift_pcoa <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    vegdist(method="euclidean") %>%
    pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]


# Get genome positions
gift_pcoa_vectors <- gift_pcoa$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2) # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- cov(genome_gifts, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(genome_gifts)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(!label %in% c("S01","S02","S03"))
set.seed(101)
scale <- 20 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(genome_metadata, by="genome") %>%
  ggplot() +
      #genome positions
      scale_color_manual(values=phylum_colors)+
      geom_point(aes(x=Axis.1,y=Axis.2, color=phylum, size=length), 
                 alpha=0.9, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.1,5)) +
      #loading positions
      geom_segment(data=gift_pcoa_gifts, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = gift_pcoa_gifts,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    xlim(-4.5,4.5) + 
    ylim(-3,2.5) +
    theme_minimal() + 
    theme(legend.position = "none")

structural <- core_microbiota %>% 
  mutate(type = case_when(
      (type_prevalence == "endemic") & (type_prevalence_environment == "high") ~ "present_high",
       (type_prevalence == "endemic") & (type_prevalence_environment == "low") ~ "present_low")) %>% 
    filter(!is.na(type)) %>% 
    select(genome,type)

enriched <- ancom_mag_skin_table %>% 
  mutate(type = case_when(
      lfc_environmentlow < 0 ~ "enriched_low",
      lfc_environmentlow >= 0 ~ "enriched_high")) %>% 
  select(genome,type)


gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(bind_rows(structural,enriched), by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  mutate(type=ifelse(is.na(type),"neutral",type)) %>%
  group_by(type) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      scale_size_continuous(range = c(0.1,5)) +
      geom_point(aes(x=Axis.1,y=Axis.2, color=type, size=length), alpha=0.9, shape=16) +
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=type), alpha = 0.5) +
      scale_color_manual(values=c("#5A99D2","#F16144","#B7B7B7","#225072","#8C2C1C"))+
    theme_minimal()

scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(core_microbiota, by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  filter(type != "absent") %>% 
  mutate(shape = case_when(
      type == "core" ~ "core",
      type == "marginal" ~ "marginal",
      type == "endemic" & type_prevalence_environment == "high" ~ "endemic_high",
      type == "endemic" & type_prevalence_environment == "low" ~ "endemic_low"
    )) %>% 
  group_by(shape) %>%
  mutate(x_cen = median(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = median(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot() +
      #genome positions
      geom_segment(aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=shape), alpha = 0.5, show.legend = FALSE) +
      scale_color_manual(values=c("#9E8F71","#5A99D2","#F16144","#B7B7B7"))+
      theme_minimal() 

   # theme(legend.position = "none")
# A tibble: 33 × 5
   gift   high   low    p_value p_adjust
   <chr> <dbl> <dbl>      <dbl>    <dbl>
 1 B0104 0.681 0.824 0.00578    0.0279  
 2 B0106 0.894 0.747 0.00228    0.0123  
 3 B0208 0.476 0.720 0.000794   0.00631 
 4 B0213 0.582 0.795 0.00000629 0.000788
 5 B0220 0.179 0.311 0.00770    0.0359  
 6 B0302 0.233 0.457 0.00193    0.0108  
 7 B0307 0.370 0.201 0.000193   0.00260 
 8 B0309 0.134 0.269 0.000956   0.00679 
 9 B0402 0.222 0.357 0.000658   0.00631 
10 B0403 0.264 0.398 0.0116     0.0474  
# ℹ 23 more rows

term df SumOfSqs R2 statistic p.value
environment 1 14.464879 0.1785608 6.438403 0.001
river 2 8.130211 0.1003629 1.809402 0.097
Residual 26 58.413069 0.7210764 NA NA
Total 29 81.008159 1.0000000 NA NA

gift high low p_value p_adjust
B0104 0.67421663 1.0000000 0.0018754356 0.005672049
B0105 0.73759193 1.0000000 0.0018754356 0.005672049
B0106 0.80621259 1.0000000 0.0017459182 0.005672049
B0204 0.48537488 0.8606981 0.0145424059 0.028623148
B0206 0.61242517 1.0000000 0.0017937971 0.005672049
B0207 0.43143388 0.7100000 0.0019749754 0.005695278
B0209 0.79386259 1.0000000 0.0018754356 0.005672049
B0210 0.55247656 0.8606981 0.0145424059 0.028623148
B0213 0.61084133 0.8944683 0.0145424059 0.028623148
B0214 0.22485035 0.6834048 0.0219467710 0.041233327
B0218 0.46423032 0.7889365 0.0256749426 0.046819013
B0220 0.07385765 0.7171750 0.0010154015 0.005672049
B0601 0.54689170 0.9155746 0.0079058708 0.017824145
B0602 0.63919238 0.9155746 0.0145424059 0.028623148
B0603 0.66787867 1.0000000 0.0018754356 0.005672049
B0604 0.43445679 1.0000000 0.0018754356 0.005672049
B0605 0.00000000 0.4221269 0.0045906627 0.011161611
B0702 0.66854993 1.0000000 0.0018754356 0.005672049
B0703 0.07420061 0.6700000 0.0004222482 0.005672049
B0704 0.48294837 1.0000000 0.0018754356 0.005672049
B0706 0.41331224 1.0000000 0.0018754356 0.005672049
B0707 0.76551369 1.0000000 0.0018754356 0.005672049
B0710 0.06335476 0.1400000 0.0007537527 0.005672049
B0711 0.14734772 0.2664340 0.0010154015 0.005672049
B0712 0.71438862 0.3700429 0.0020470591 0.005768985
B0803 0.73937330 1.0000000 0.0018754356 0.005672049
B0804 0.52266792 0.8606981 0.0145424059 0.028623148
B1028 0.03147905 0.1400000 0.0014496951 0.005672049
B1029 0.00000000 0.1444683 0.0045906627 0.011161611
D0102 0.33798384 1.0000000 0.0018754356 0.005672049
D0103 0.16414075 0.6700000 0.0063405256 0.014834437
D0104 0.31230836 0.8000000 0.0019585157 0.005695278
D0201 0.06134920 0.8100000 0.0007255498 0.005672049
D0202 0.16943443 0.8037702 0.0010154015 0.005672049
D0203 0.22479340 0.4890978 0.0008505648 0.005672049
D0205 0.03972843 0.3057359 0.0010154015 0.005672049
D0206 0.00000000 0.6115146 0.0002918961 0.005672049
D0207 0.11256387 0.6128250 0.0010154015 0.005672049
D0208 0.13134920 0.4595489 0.0009808556 0.005672049
D0209 0.14134920 0.6079485 0.0010066900 0.005672049
D0210 0.19001248 0.2500000 0.0018754356 0.005672049
D0211 0.00000000 0.2195918 0.0045906627 0.011161611
D0213 0.00000000 0.3093019 0.0002918961 0.005672049
D0302 0.00000000 0.3300000 0.0001880890 0.005672049
D0306 0.11242517 0.5000000 0.0013515402 0.005672049
D0308 0.05621259 0.4422127 0.0007395593 0.005672049
D0309 0.25000000 0.6055317 0.0007325316 0.005672049
D0505 0.21728768 0.5264768 0.0145424059 0.028623148
D0506 0.16892979 0.5000000 0.0018099404 0.005672049
D0507 0.19124896 0.6273362 0.0010154015 0.005672049
D0508 0.08094613 0.2586895 0.0219467710 0.041233327
D0509 0.48650715 0.6444683 0.0223620729 0.041386523
D0511 0.32436673 1.0000000 0.0018754356 0.005672049
D0512 0.22485035 0.6834048 0.0219467710 0.041233327
D0513 0.06335476 0.3709022 0.0010154015 0.005672049
D0601 0.36577691 0.5000000 0.0018754356 0.005672049
D0604 0.00000000 0.4221269 0.0045906627 0.011161611
D0610 0.06917013 0.0000000 0.0051195000 0.012208038
D0611 0.11242517 0.5000000 0.0013515402 0.005672049
D0613 0.87641709 1.0000000 0.0126508551 0.027521158
D0704 0.56941341 1.0000000 0.0018754356 0.005672049
D0708 0.11242517 0.3944683 0.0120900251 0.026770770
D0804 0.67738433 0.0000000 0.0006075647 0.005672049
D0807 0.69205678 0.1544254 0.0020946080 0.005771809
D0813 0.00000000 0.2889365 0.0045906627 0.011161611
D0816 0.09668565 0.4820515 0.0065015311 0.014929442
D0901 0.00000000 0.4221269 0.0045906627 0.011161611
D0907 0.22485035 1.0000000 0.0013515402 0.005672049

term df SumOfSqs R2 statistic p.value
environment 1 49.92847 0.4416516 12.05864 0.001
river 2 13.43533 0.1188447 1.62244 0.187
Residual 12 49.68566 0.4395037 NA NA
Total 15 113.04945 1.0000000 NA NA

# A tibble: 0 × 5
# ℹ 5 variables: gift <chr>, high <dbl>, low <dbl>, p_value <dbl>, p_adjust <dbl>

term df SumOfSqs R2 statistic p.value
environment 1 12.004006 0.2843473 2.144208 0.106
river 1 7.818642 0.1852056 1.396600 0.318
Residual 4 22.393358 0.5304471 NA NA
Total 6 42.216006 1.0000000 NA NA